1. Connect to PostgreSQL to import the tables created into R.
con <- DBI::dbConnect(RPostgres::Postgres(),dbname = "wefarmtestdb",user = rstudioapi::askForPassword("Database user"),password = rstudioapi::askForPassword("Database password"),port = "5432")
  1. View the list of tables
dbListTables(con)
## [1] "target_group"      "target_group_user" "impression"       
## [4] "member"            "member_post"
  1. Finding the click through value.
SELECT
    Content,
    impression.keyword,
    COUNT(*) AS click_through_value
FROM
    target_group
INNER JOIN impression
    ON target_group.keyword = impression.keyword
GROUP BY content,impression.keyword
ORDER BY impression.keyword;
print(click_through_value)
##                                                                                                                                                            content
## 1                            Wefarm online is here! Join farming groups, connect with other farmers & share tips to help each other succeed. Join now we.fm/sms/i1
## 2                           Be known as a model farmer in your area of farming by answering your fellow farmers' questions on Wefarm online. Join now we.fm/sms/i3
## 3 Farming is hard work! We need to help each other succeed. Wefarm online is a farmer-only community connecting farmers from all over Kenya. Join now we.fm/sms/i5
## 4       What did you do on your farm today? share your daily activities on Wefarm online to inspire others & to keep track of your progress. Join now we.fm/sms/i6
##   keyword click_through_value
## 1 /sms/i1                  28
## 2 /sms/i3                  22
## 3 /sms/i5                  33
## 4 /sms/i6                  26
  1. Finding the join value
SELECT
    target_group_id,
    COUNT (*) AS join_value
FROM target_group_user
INNER JOIN member
    ON user_phone_number = member_phone_number
WHERE target_group_id 
    IN(1,3,5,6)
GROUP BY target_group_id;
print(join_value)
##   target_group_id join_value
## 1               1          9
## 2               3          1
## 3               5          3
## 4               6          1
  1. Finding the contribution value
SELECT 
    target_group_id,
    COUNT(*) AS contribution_value
FROM member_post
INNER JOIN target_group_user
    ON member_post.member_phone_number = target_group_user.user_phone_number
WHERE target_group_id
    IN(1,3,5,6)
GROUP BY target_group_id;
print(contribution_value)
##   target_group_id contribution_value
## 1               5                  1
  1. Finding the Click Through Rate
SELECT
    target_group_id,
    COUNT (*) AS target_number
FROM target_group_user
WHERE target_group_id
    IN(1,3,5,6)
GROUP BY target_group_id
ORDER BY target_group_id;
click_through_df <- rbind("Target number" =target_group_user$target_number,"Click value"=click_through_value$click_through_value,"Join value" =join_value$join_value)
colnames(click_through_df) <- c("Variation 1", "Variation 3" , "Variation 5", "Variation 6")
click_through_rate <- mapply('/', click_through_df[2,],click_through_df[1,])*100
click_through_rate
## Variation 1 Variation 3 Variation 5 Variation 6 
##    1.866667    1.465690    2.198534    1.733333
  1. Finding the join rate
join_rate <- mapply('/', click_through_df[3,],click_through_df[2,])*100
join_rate
## Variation 1 Variation 3 Variation 5 Variation 6 
##   32.142857    4.545455    9.090909    3.846154
  1. Plotting the join rate and click rate
click_join_df <- as.data.frame(t(rbind (click_through_rate, join_rate)))
click_join_df <- rownames_to_column(click_join_df, var="Variation") %>% head
click_join_df_long <- gather(click_join_df, key="Measure", value="Value", c("click_through_rate", "join_rate"))
plot1 <- click_join_df_long %>%
  arrange(Value) %>%
  mutate(Variation=factor(Variation, levels=rev(unique(Variation)), ordered=TRUE)) %>%
  ggplot( aes(fill=Measure, x=Variation, y=Value))+
  geom_bar(position = "dodge", stat = "identity", width=0.4) +
  ggtitle("Click Through Rate vs Join Rate")+
  xlab("Test Group")+
  ylab("Rate")
ggplotly(plot1)

From the plot, we see that variation 5 (2.1985) has a better click through rate than variation 1 (1.86667). We also see that variation 1 (32.143) has the best join rate from the sample.

Next we are going to test the null hypothesis i.e the generic message (variation 1) would have had a lower click through rate and join rate than the user-based value propositions (other variations in the test), then further test for the statistical significance of the result.

chisq.test(click_join_df$join_rate,click_join_df$click_through_rate)
## Warning in chisq.test(click_join_df$join_rate,
## click_join_df$click_through_rate): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  click_join_df$join_rate and click_join_df$click_through_rate
## X-squared = 12, df = 9, p-value = 0.2133

A chi-squared test statistic at 95% confidence level returns p-values for both click rate and join rate that are greater than 0.05 (i.e 0.2133 > 0.05). This indicates that there isnโ€™t enough evidence to to reject the null hypothesis thus we fail to reject the null hypothesis and conclude that the test result is not statistically significant at this point.